Prepared this Sales Analysis for Bloom Daddy, a purveyor of rare, exotic houseplants.
When I was consulting with him, he seemed keen to understand a few facets of the data provided by Etsy on his online business. Namely, he wanted a lens into his business that I outline below:
Below you’ll find a report developed to answer some of these questions. We dive into recent numbers, look at Week-over-Week revenue, dive into an RFM (Recency, Frequency, Monetary) Analysis to segment and strategise, and then outline the top items, groups, and consumers from the data.
In this section, you’ll find all of my work to prepare for the analysis including the R packages I utilize and the data import.
knitr::opts_chunk$set(echo = TRUE)
options(ggplot2.discrete.color = "viridis")
options(ggplot2.continuous.color = "viridis")
options(ggplot2.continuous.fill = "viridis")
options(ggplot2.discrete.fill = "viridis")
options(scipen = 999)
# getwd()
#packages ----
#Workhorse
library(tidyverse)
library(lubridate)
library(rfm)
#Import & Export
library(readxl)
library(writexl)
library(readr)
#Formatting & Visualization
library(ggdist)
library(ggrepel)
library(tidyquant)
library(hrbrthemes)
library(kableExtra)
library(viridisLite)
library(scales)
library(DT) detect_na <- function(data) {
fdat1 <- data %>%
summarise_all(~ sum(!is.na(.)))
fdat2 <- data %>%
summarise_all(~ sum(is.na(.)))
fdat3 <- data %>%
summarise_all(~ sum(is.na(.)) / length(.))
fdat1_2 <- fdat1 %>%
pivot_longer(everything(), names_to = "column_names", values_to = "non_NULL")
fdat2_2 <- fdat2 %>%
pivot_longer(everything(), names_to = "column_names", values_to = "NULL")
fdat3_2 <- fdat3 %>%
pivot_longer(everything(), names_to = "column_names", values_to = "percent_NULL")
fdat1_2 %>%
left_join(fdat2_2, by = c("column_names")) %>%
left_join(fdat3_2, by = c("column_names")) %>%
arrange(desc(percent_NULL)) %>%
kbl(align = "l", format.args = list(big.mark = ",")) %>%
kable_styling(
full_width = F,
bootstrap_options = c("hover", "responsive", "striped"))
}
tablekable <- function(data) {
data %>%
kbl(align = "l") %>%
kable_styling(
full_width = F,
bootstrap_options = c("hover", "responsive", "striped"))
}
tabledata <- function(data) {
data %>%
datatable(filter = "bottom", style = "bootstrap5")
# “bootstrap5”, “bulma”, “dataTables”, “foundation”, “jqueryui”, “semanticui”
}dat_import <- read_excel("etsy + item attributes.xlsx",
sheet = "dat")
dat_import %>% glimpse()## Rows: 1,126
## Columns: 38
## $ `Sale Date` <dttm> 2021-12-31, 2021-12-30, 2021-12-30, 2021-12-30, 2…
## $ `Item Description` <chr> "philodendron pink princess highly variegated moth…
## $ `Item Group` <chr> "philodendron", "philodendron", "philodendron", "h…
## $ `Item Name` <chr> "philodendron pink princess", "philodendron pink p…
## $ `Item Maturity` <chr> "s", "s", "s", NA, "s", NA, "s", "s", NA, NA, "s",…
## $ `Item Variegation` <chr> "high", "high", NA, NA, "high", NA, NA, "high", NA…
## $ `Ship Name` <chr> "Alfred F Rebuldela", "Tiffany Wynn", "Tiffany Wyn…
## $ `Buyer ID` <chr> "Deep grain", "Deepgrain", "Deepgrain", "Deepgrain…
## $ Buyer <chr> "Alfred Rebuldela (r8e6q13v5ou330iz)", "Tiffany W …
## $ Quantity <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1,…
## $ Price <dbl> 69.0, 69.0, 55.0, 6.0, 69.0, 20.0, 55.0, 69.0, 135…
## $ `Coupon Code` <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ `Coupon Details` <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ `Discount Amount` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ `Shipping Discount` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ `Order Shipping` <dbl> 11.5, 11.5, 0.0, 0.0, 11.5, 11.5, 11.5, 11.5, 29.5…
## $ `Order Sales Tax` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ `Item Total` <dbl> 69.0, 69.0, 55.0, 6.0, 69.0, 20.0, 55.0, 69.0, 135…
## $ Currency <chr> "USD", "USD", "USD", "USD", "USD", "USD", "USD", "…
## $ `Transaction ID` <dbl> 2817430123, 2817231051, 2817231049, 2817231047, 28…
## $ `Listing ID` <dbl> 1022782347, 1022782347, 1084945150, 965091596, 102…
## $ `Date Paid` <dttm> 2021-12-31, 2021-12-30, 2021-12-30, 2021-12-30, 2…
## $ `Date Shipped` <dttm> 2022-01-06, 2022-01-07, 2022-01-07, 2022-01-07, 2…
## $ `Ship Address1` <chr> "PO Box 253", "4205 8th Street NW", "4205 8th Stre…
## $ `Ship Address2` <chr> NA, "Unit 1", "Unit 1", "Unit 1", "Unit 302", NA, …
## $ `Ship City` <chr> "Papaikou", "Washington", "Washington", "Washingto…
## $ `Ship State` <chr> "HI", "DC", "DC", "DC", "IL", "CA", "CA", "WV", "C…
## $ `Ship Zipcode` <chr> "96781", "20011", "20011", "20011", "60616", "9132…
## $ `Ship Country` <chr> "United States", "United States", "United States",…
## $ `Order ID` <dbl> 2328683113, 2329148030, 2329148030, 2329148030, 23…
## $ Variations <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ `Order Type` <chr> "online", "online", "online", "online", "online", …
## $ `Listings Type` <chr> "listing", "listing", "listing", "listing", "listi…
## $ `Payment Type` <chr> "online_cc", "online_cc", "online_cc", "online_cc"…
## $ `InPerson Discount` <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ `InPerson Location` <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ `VAT Paid by Buyer` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ SKU <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
dat_tidy <- dat_import %>%
rename_with(tolower) %>%
rename_with(~ str_replace_all(
string = .,
pattern = " ",
replacement = "_")) %>%
rename(customer_name = buyer_id) %>%
select(
sale_date, order_id, quantity, price,
item_description, item_group, item_name, item_maturity, item_variegation,
customer_name, #ship_address1, ship_address2,
ship_city, ship_state, ship_zipcode, ship_country
) %>%
mutate(sale_date = lubridate::as_date(sale_date),
order_id = as.character(order_id)) %>%
filter(item_group != "heat pack") %>%
mutate_if(is.character, str_to_title) %>%
mutate_at(c("quantity", "price"), as.integer)
dat_tidy %>% glimpse()## Rows: 1,090
## Columns: 14
## $ sale_date <date> 2021-12-31, 2021-12-30, 2021-12-30, 2021-12-30, 2021…
## $ order_id <chr> "2328683113", "2329148030", "2329148030", "2328257683…
## $ quantity <int> 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ price <int> 69, 69, 55, 69, 20, 55, 69, 135, 14, 69, 69, 69, 69, …
## $ item_description <chr> "Philodendron Pink Princess Highly Variegated Mother …
## $ item_group <chr> "Philodendron", "Philodendron", "Philodendron", "Phil…
## $ item_name <chr> "Philodendron Pink Princess", "Philodendron Pink Prin…
## $ item_maturity <chr> "S", "S", "S", "S", NA, "S", "S", NA, NA, "S", "S", "…
## $ item_variegation <chr> "High", "High", NA, "High", NA, NA, "High", NA, NA, "…
## $ customer_name <chr> "Deep Grain", "Deepgrain", "Deepgrain", "Deepwing", "…
## $ ship_city <chr> "Papaikou", "Washington", "Washington", "Chicago", "N…
## $ ship_state <chr> "Hi", "Dc", "Dc", "Il", "Ca", "Ca", "Wv", "Ca", "Ca",…
## $ ship_zipcode <chr> "96781", "20011", "20011", "60616", "91325", "92831",…
## $ ship_country <chr> "United States", "United States", "United States", "U…
Here we produce the data that breathes life into the rest of the execution and experimentation in this paper.
Orderlines contains the individual line items making up the transactions.
dat_orderlines <- dat_tidy %>%
arrange(item_name, price, item_maturity, item_variegation, sale_date) %>%
select(sale_date, item_group, item_name,
quantity, price,
customer_name, order_id)
dat_orderlines %>% glimpse()## Rows: 1,090
## Columns: 7
## $ sale_date <date> 2021-08-26, 2021-08-30, 2021-04-02, 2021-06-14, 2021-10…
## $ item_group <chr> "Alocasia", "Alocasia", "Anthurium", "Anthurium", "Anthu…
## $ item_name <chr> "Alocasia Azlanii", "Alocasia Azlanii", "Anthurium 'Radi…
## $ quantity <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ price <int> 125, 125, 145, 115, 30, 30, 40, 40, 89, 89, 89, 89, 89, …
## $ customer_name <chr> "Ronchegnac", "Hallowdrifter", "Wheatbrow", "Nicklegrain…
## $ order_id <chr> "2160200271", "2161298804", "2005568728", "2089383487", …
Orders is an aggregate view on Orderlines using the Order ID. It effectively shows what was purchased in that order. I’ve even fashioned a “receipt”.
dat_orders <- dat_orderlines %>%
group_by(sale_date, customer_name, order_id) %>%
mutate(receipt = paste0(item_name)) %>%
summarise(
order_price = sum(price),
order_quantity = sum(quantity),
order_receipt = toString(unique(receipt)),
.groups = "drop")
dat_orders %>% glimpse()## Rows: 1,017
## Columns: 6
## $ sale_date <date> 2021-01-03, 2021-01-13, 2021-01-15, 2021-01-15, 2021-0…
## $ customer_name <chr> "Youngvigor", "Younger", "York", "Youngblood", "Yoakum"…
## $ order_id <chr> "1911958567", "1914566518", "1916803730", "1924880855",…
## $ order_price <int> 7, 89, 89, 21, 89, 130, 89, 89, 130, 115, 89, 89, 89, 1…
## $ order_quantity <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1…
## $ order_receipt <chr> "Red Venus Fly Trap 'Akai Ryu'", "Anthurium Clarinerviu…
This is where the consumers live and will be the backbone of the RFM analysis.
dat_customers <- dat_orderlines %>%
group_by(customer_name) %>%
mutate(items = paste0(item_name)) %>%
summarise(
total_spent = sum(price),
total_items = sum(quantity),
total_orders = n_distinct(order_id),
first_purchase = min(sale_date),
last_purchase = max(sale_date),
receipt = toString(unique(items))) %>%
ungroup() %>%
mutate(
analysis_date = ymd("20220101"),
tenure = analysis_date - first_purchase,
recency = as.integer( last_purchase - analysis_date ) ,
monetary = total_spent,
frequency = total_orders,
total_spent = total_spent %>% scales::dollar(accuracy = 1)
) %>% select(customer_name, receipt,
recency, frequency, monetary,
contains("total_"),
everything())
dat_customers %>% glimpse() ## Rows: 664
## Columns: 12
## $ customer_name <chr> "Deep Grain", "Deepgrain", "Deepwing", "Deepwoods", "De…
## $ receipt <chr> "Philodendron Pink Princess", "Philodendron Florida Gho…
## $ recency <int> -1, -2, -2, -3, -6, -6, -7, -7, -9, -9, -12, -13, -14, …
## $ frequency <int> 1, 1, 2, 2, 1, 1, 2, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 2, 1…
## $ monetary <int> 69, 124, 89, 190, 14, 69, 138, 69, 69, 138, 224, 135, 2…
## $ total_spent <chr> "$69", "$124", "$89", "$190", "$14", "$69", "$138", "$6…
## $ total_items <int> 1, 2, 2, 2, 2, 1, 2, 1, 1, 2, 4, 1, 1, 1, 1, 1, 1, 2, 1…
## $ total_orders <int> 1, 1, 2, 2, 1, 1, 2, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 2, 1…
## $ first_purchase <date> 2021-12-31, 2021-12-30, 2021-12-29, 2021-12-27, 2021-1…
## $ last_purchase <date> 2021-12-31, 2021-12-30, 2021-12-30, 2021-12-29, 2021-1…
## $ analysis_date <date> 2022-01-01, 2022-01-01, 2022-01-01, 2022-01-01, 2022-0…
## $ tenure <drtn> 1 days, 2 days, 3 days, 5 days, 6 days, 6 days, 7 days…
Diving into the recent performance of Bloom Daddy. We use 7, 28, 120, and 365 day intervals as they reduce seasonality seen in other models. To elaborate, many people use 30 days, but this can add volatility to your numbers as you’re accounting for 4 weeks and 2 days vs. a clean 4 weeks. If you experience varied productivity throughout the week, the 30 days can hurt you depending on the seasonality.
dat_past_performance <- dat_orderlines %>%
mutate(
analysis_date = ymd("20220101"),
ddays = sale_date - analysis_date) %>%
mutate(
ddays = ddays %>% as.integer(),
flag_past7 = case_when(ddays >= -7 ~ 1, T ~ 0),
flag_past28 = case_when(ddays >= -28 ~ 1, T ~ 0),
flag_past120 = case_when(ddays >= -120 ~ 1, T ~ 0),
flag_past365 = case_when(ddays >= -365 ~ 1, T ~ 0)
) %>%
select(sale_date, item_name, item_group, price, contains("past"))
dat_pp7 <- dat_past_performance %>%
filter(flag_past7 == 1) %>%
summarise(total_spend = sum(price)) %>%
mutate(phase = "past7")
dat_pp28 <- dat_past_performance %>%
filter(flag_past28 == 1) %>%
summarise(total_spend = sum(price)) %>%
mutate(phase = "past28")
dat_pp120 <- dat_past_performance %>%
filter(flag_past120 == 1) %>%
summarise(total_spend = sum(price)) %>%
mutate(phase = "past120")
dat_pp365 <- dat_past_performance %>%
filter(flag_past365 == 1) %>%
summarise(total_spend = sum(price)) %>%
mutate(phase = "past365")
dat_pp7 %>% rbind(dat_pp28) %>% rbind(dat_pp120) %>% rbind(dat_pp365) %>%
pivot_wider(names_from = phase, values_from = total_spend) %>%
mutate_all(scales::dollar) %>%
tablekable()| past7 | past28 | past120 | past365 |
|---|---|---|---|
| $831 | $3,531 | $13,383 | $85,529 |
viz_timeline <- dat_orderlines %>%
mutate(sale_week = FLOOR_WEEK(sale_date)) %>%
group_by(sale_week) %>%
summarise(total_spend = sum(price)) %>%
ggplot(aes(x = sale_week,
y = total_spend,
fill = total_spend,
text = str_glue("Date: {sale_week}
Revenue: {scales::dollar(total_spend)}"))) +
geom_smooth() +
geom_col() +
theme_ipsum() +
# ylim(1, 365) +
labs(
title = "Sales over Time",
subtitle = "",
caption = "",
x = "Week",
y = "Total Spend",
fill = "Total Spend"#,
# color = "Persona"
) +
viridis::scale_fill_viridis(direction = 1, discrete = FALSE) +
theme(
legend.position = "bottom",
# axis.text = element_blank(),
# axis.text.x = element_text(angle = 90),
# axis.title = element_blank(),
# panel.grid = element_blank(),
plot.margin = margin(.5, .5, .5, .5, "cm")
) plotly::ggplotly(viz_timeline, tooltip = "text")viz_timelineAn RFM Analysis is built on three components:
By using these facets, we can segment our data base and prescribe strategies tailored to the user’s behavior. For instance, a user with high monetary and frequency values, but low recency can be targeted with marketing to intervene and prevent them from lapsing.
dat_rfm3 <- dat_customers %>%
cbind(dat_orderlines %>%
group_by(customer_name) %>%
mutate(items = paste0(item_name)) %>%
summarise(
total_spent = sum(price),
total_items = sum(quantity),
total_orders = n_distinct(order_id),
first_purchase = min(sale_date),
last_purchase = max(sale_date),
receipt = toString(unique(items))) %>%
ungroup() %>%
mutate(
analysis_date = ymd("20220101"),
tenure = analysis_date - first_purchase,
recency = last_purchase - analysis_date,
monetary = total_spent,
frequency = total_orders,
total_spent = total_spent %>% scales::dollar(accuracy = 1)
) %>% select(customer_name, contains("total_"), receipt,
recency, frequency, monetary, tenure,
everything()) %>%
summarise(r = ntile(x = recency, n = 3),
f = ntile(x = frequency, n = 3),
m = ntile(x = monetary, n = 3))) %>%
mutate(rfm_score = paste(r,f,m),
rfm_sum = as.integer(r)+as.integer(f)+as.integer(m)) %>%
select(customer_name, contains("rfm_"),
r,f,m,recency, frequency, monetary,
total_spent, total_orders, total_items,
last_purchase, tenure, everything()
) %>%
arrange(desc(monetary), recency, desc(frequency))Personas use Recency, Frequency, and Monetary in a 3D-fashion to help articulate tactics for the different consumers present. Based on our definition, we have six options total: Champions, Loyal, Recent, High Potential, Needs Nurturing, and Inactive. Unfortunately, we were yet to produce Champions as they need consistently high marks across all three facets.
dat_rfm_persona1 <- dat_rfm3 %>%
mutate(fm = f+m) %>%
mutate(
rfm_persona = case_when(
r == 3 & between(fm,8,10) ~ "Champions",
r == 3 & between(fm,3,7) ~ "Loyal",
r == 3 & between(fm,2,2) ~ "Recent",
r == 2 & between(fm,6,10) ~ "High Potential",
r == 2 & between(fm,2,5) ~ "Needs Nurturing",
r == 1 & between(fm,2,10) ~ "Inactive",
T ~ "")) %>%
select(customer_name, contains("rfm_p"), contains("rfm_"), everything())
dat_rfm_persona1$rfm_persona <- factor(dat_rfm_persona1$rfm_persona,
levels = c("Champions", "Loyal", "Recent",
"High Potential", "Needs Nurturing",
"Inactive"))
dat_rfm_persona1 %>% group_by(rfm_persona) %>%
summarise(n = n()) %>%
mutate(perc_total = n/sum(n)) %>%
ungroup() %>%
mutate(perc_total = scales::percent(perc_total)) %>% tablekable()| rfm_persona | n | perc_total |
|---|---|---|
| Loyal | 124 | 18.7% |
| Recent | 97 | 14.6% |
| High Potential | 82 | 12.3% |
| Needs Nurturing | 139 | 20.9% |
| Inactive | 222 | 33.4% |
viz_dat_m <- dat_rfm_persona1
viz_m <- viz_dat_m %>%
ggplot(aes(x = fct_reorder(rfm_persona, total_spent),
y = monetary,
fill = fct_reorder(rfm_persona, total_spent)
# text = str_glue(
# "Monetary :: {rfm_persona}
# {total_spent} ({m})")
)) +
ggdist::stat_halfeye(aes(fct_reorder(rfm_persona, total_spent)),
## custom bandwidth
adjust = 0.5,
## move geom to the right
justification = -.2,
## remove slab interval
.width = 0,
point_colour = NA,
na.rm = T) +
geom_boxplot(aes(color = fct_reorder(rfm_persona, total_spent)),
width = .4,
## remove outliers
outlier.color = NA,
outlier.alpha = 0.33,
alpha = 0.66,
na.rm = T) +
# Add dot plots from {ggdist} package
ggdist::stat_dots(aes(color = fct_reorder(rfm_persona, total_spent)),
# geom_jitter(aes(color = fct_reorder(rfm_persona, total_spent)),
# alpha = 0.33,
# width = 0.33,
# height = 0.33,
# na.rm = T) +
# ## orientation to the left
side = "left",
# ## move geom to the left
justification = 1.1,
# ## adjust grouping (binning) of observations
binwidth = .25) +
# jitter = TRUE
#inherit.aes = TRUE
#scale_fill_viridis_d(direction = -1) +
theme_ipsum() +
ylim(0, 600) +
coord_flip() +
labs(
title = "Persona Raincloud: Monetary",
subtitle = "Total Spent vs Persona",
caption = "",
x = "Persona",
y = "Total Spent",
fill = "Persona",
color = "Persona") +
viridis::scale_fill_viridis(direction = -1, discrete = TRUE) +
theme(
legend.position = "bottom",
# axis.text = element_blank(),
# axis.text.x = element_text(angle = 90),
# axis.title = element_blank(),
# panel.grid = element_blank(),
plot.margin = margin(.5, .5, .5, .5, "cm")
)
viz_mviz_dat_r <- dat_rfm_persona1 %>%
mutate(recency = -1*as.integer(recency))
viz_r <- viz_dat_r %>%
ggplot(aes(x = fct_reorder(rfm_persona, total_spent),
y = recency,
fill = fct_reorder(rfm_persona, total_spent)
# text = str_glue(
# "Monetary :: {rfm_persona}
# {total_spent} ({m})")
)) +
ggdist::stat_halfeye(aes(fct_reorder(rfm_persona, total_spent)),
## custom bandwidth
adjust = 0.5,
## move geom to the right
justification = -.2,
## remove slab interval
.width = 0,
point_colour = NA,
na.rm = T) +
geom_boxplot(aes(color = fct_reorder(rfm_persona, total_spent)),
width = .4,
## remove outliers
outlier.color = NA,
outlier.alpha = 0.33,
alpha = 0.66,
na.rm = T) +
# Add dot plots from {ggdist} package
ggdist::stat_dots(aes(color = fct_reorder(rfm_persona, total_spent)),
# geom_jitter(aes(color = fct_reorder(rfm_persona, total_spent)),
# alpha = 0.33,
# width = 0.33,
# height = 0.33,
# na.rm = T) +
# ## orientation to the left
side = "left",
# ## move geom to the left
justification = 1.1,
# ## adjust grouping (binning) of observations
binwidth = .25) +
#scale_fill_viridis_d(direction = -1) +
theme_ipsum() +
ylim(1, 365) +
coord_flip() +
labs(
title = "Persona Raincloud: Recency",
subtitle = "Days Since Last Purchase vs Persona",
caption = "",
x = "Persona",
y = "Days Since Purchase",
fill = "Persona",
color = "Persona") +
viridis::scale_fill_viridis(direction = -1, discrete = TRUE) +
theme(
legend.position = "bottom",
# axis.text = element_blank(),
# axis.text.x = element_text(angle = 90),
# axis.title = element_blank(),
# panel.grid = element_blank(),
plot.margin = margin(.5, .5, .5, .5, "cm")
)
viz_rviz_dat_f <- dat_rfm_persona1
viz_f <- viz_dat_f %>%
ggplot(aes(x = fct_reorder(rfm_persona, total_spent),
y = frequency,
fill = fct_reorder(rfm_persona, total_spent)
# text = str_glue(
# "Monetary :: {rfm_persona}
# {total_spent} ({m})")
)) +
ggdist::stat_halfeye(aes(fct_reorder(rfm_persona, total_spent)),
## custom bandwidth
adjust = 0.5,
## move geom to the right
justification = -.2,
## remove slab interval
.width = 0,
point_colour = NA,
na.rm = T) +
geom_boxplot(aes(color = fct_reorder(rfm_persona, total_spent)),
width = .4,
## remove outliers
outlier.color = NA,
outlier.alpha = 0.33,
alpha = 0.66,
na.rm = T) +
# Add dot plots from {ggdist} package
theme_ipsum() +
ylim(1, 4) +
coord_flip() +
labs(
title = "Persona Raincloud: Frequency",
subtitle = "Total Orders vs Persona",
caption = "",
x = "Persona",
y = "Total Orders",
fill = "Persona",
color = "Persona") +
viridis::scale_fill_viridis(direction = -1, discrete = TRUE) +
theme(
legend.position = "bottom",
# axis.text = element_blank(),
# axis.text.x = element_text(angle = 90),
# axis.title = element_blank(),
# panel.grid = element_blank(),
plot.margin = margin(.5, .5, .5, .5, "cm")
)
viz_fstat_items <- dat_orderlines %>%
group_by(item_group, item_name) %>%
summarise(n_items = n(),
n_orders = n_distinct(order_id),
sum_price = sum(price)
) %>% ungroup() %>%
mutate(
revenue = sum_price %>% scales::dollar(accuracy = 1,big.mark = ","),
perc_total = sum_price/sum(sum_price),
percent_total = perc_total %>% scales::percent(accuracy = 0.01),
rankv = -perc_total
) %>% select(item_group, item_name,
n_items, n_orders, revenue, percent_total,
everything()) %>%
arrange(desc(sum_price),n_orders, n_items) %>%
mutate(rank = rank(rankv))## `summarise()` has grouped output by 'item_group'. You can override using the
## `.groups` argument.
viz_items <- stat_items %>%
filter(rank <= 10) %>%
ggplot(aes(x = item_name %>% fct_reorder(sum_price),
y = sum_price,
fill = sum_price,
text = str_glue("
Name: {item_name}
Revenue: {revenue}
% Revenue: {percent_total}
Items Sold: {n_items}
Total Orders: {n_orders}"))) +
geom_col()+
coord_flip() +
theme_ipsum() +
labs(
title = "Top Performing Items",
subtitle = "Total Spend vs Items",
caption = "",
x = "Items",
y = "Total Spend",
fill = "Total Spend") +
theme(
legend.position = "bottom",
# axis.text = element_blank(),
# axis.text.x = element_text(angle = 90),
# axis.title = element_blank(),
# panel.grid = element_blank(),
plot.margin = margin(.5, .5, .5, .5, "cm")
) viz_items %>% plotly::ggplotly(tooltip = "text")viz_itemsstat_groups <- dat_orderlines %>%
group_by(item_group) %>%
mutate(receipt = paste0(item_name)) %>%
summarise(items_sold = toString(unique(receipt)),
n_items = n(),
n_orders = n_distinct(order_id),
sum_price = sum(price)
) %>% ungroup() %>%
mutate(
revenue = sum_price %>% scales::dollar(accuracy = 1),
perc_total = sum_price/sum(sum_price),
percent_total = perc_total %>% scales::percent(accuracy = 0.01),
rankv = -perc_total
) %>% select(item_group, items_sold,
n_items, n_orders, revenue, percent_total,
everything()) %>%
arrange(desc(sum_price),n_orders, item_group) %>%
mutate(rank = rank(rankv))
viz_groups <- stat_groups %>%
filter(rank <= 10) %>%
ggplot(aes(x = item_group %>% fct_reorder(sum_price),
y = sum_price,
fill = sum_price,
text = str_glue("
Group: {item_group}
Revenue: {revenue}
% Revenue: {percent_total}
Items Sold: {n_items}
Total Orders: {n_orders}
Items Sold: {items_sold}"
))) +
geom_col()+
coord_flip() +
theme_ipsum() +
labs(
title = "Top Performing Item Groups",
subtitle = "Total Spend vs Item Groups",
caption = "",
x = "Item Group",
y = "Total Spend",
fill = "Total Spend") +
theme(
legend.position = "bottom",
# axis.text = element_blank(),
# axis.text.x = element_text(angle = 90),
# axis.title = element_blank(),
# panel.grid = element_blank(),
plot.margin = margin(.5, .5, .5, .5, "cm")
) viz_groups %>% plotly::ggplotly(tooltip = "text")viz_groupsstat_customers <- dat_customers %>%
mutate(rankv = -monetary,
perc_total = monetary/sum(monetary)) %>%
arrange(desc(monetary), desc(frequency), desc(recency)) %>%
mutate(rank = rank(rankv),
percent_total = scales::percent(perc_total))
viz_customers_rank <- stat_customers %>%
filter(rank <= 15) %>%
ggplot(aes(x = customer_name %>% fct_reorder(monetary),
y = monetary,
fill = monetary,
text = str_glue(
"
{customer_name} is ranked no.{as.integer(rank)} with:
Revenue: {total_spent}
% Revenue: {percent_total}
Total Orders: {total_orders}
Total Items: {total_items}
Receipt: {receipt}")
)) +
geom_col() +
coord_flip() +
theme_ipsum() +
labs(
title = "Top Customers (n=15)",
subtitle = "Total Spend vs Customer",
caption = "",
x = "Persona",
y = "Total Spend",
fill = "Total Spend") +
theme(
legend.position = "bottom",
# axis.text = element_blank(),
# axis.text.x = element_text(angle = 90),
# axis.title = element_blank(),
# panel.grid = element_blank(),
plot.margin = margin(.5, .5, .5, .5, "cm")
) viz_customers_rank %>% plotly::ggplotly(tooltip = "text")viz_customers_rankviz_customers_polar <- stat_customers %>%
filter(rank <= 50) %>%
ggplot(aes(x = customer_name %>% fct_reorder(monetary),
y = monetary,
fill = monetary,
text = str_glue(
"
{customer_name} is ranked {as.integer(rank)} with:
Revenue: {total_spent}
% Revenue: {percent_total}
Total Orders: {total_orders}
Total Items: {total_items}
Receipt: {receipt}")
)) +
geom_col() +
coord_polar() +
theme_ipsum() +
labs(
title = "Top Customers in the Arctic (n=50)",
subtitle = "Total Spend vs Customer",
caption = "",
x = "Persona",
y = "Total Spend",
fill = "Total Spend") +
theme(
legend.position = "bottom",
# axis.text = element_blank(),
# axis.text.x = element_text(angle = 90),
# axis.title = element_blank(),
# panel.grid = element_blank(),
plot.margin = margin(.5, .5, .5, .5, "cm")
)
viz_customers_polarwritexl::write_xlsx(x = list(orderlines = dat_orderlines,
orders = dat_orders,
customers = dat_customers),
path = "outbox/Bloom Daddy's Druid Data.xlsx")